import pandas as pd
import datetime
import os
import plotly.express as px
# Highway vehicle detections
df_traffic_data_autobahn_total = None
# assign directory
directory = 'traffic data/autobahn data'
# iterate over files in
# that directory
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(file_path):
print(file_path)
# Open File
df_traffic_data_autobahn_year = pd.read_csv(file_path, delimiter=';')
df_traffic_data_autobahn_year['Datum'] = '20' + df_traffic_data_autobahn_year['Datum'].apply(str)
df_traffic_data_autobahn_year['Datum'] = pd.to_datetime(df_traffic_data_autobahn_year['Datum'], format='%Y%m%d')
df_traffic_data_autobahn_year['Autobahn_KFZ_Total'] = df_traffic_data_autobahn_year['KFZ_R1'] + df_traffic_data_autobahn_year['KFZ_R2']
df_traffic_data_autobahn_year = df_traffic_data_autobahn_year[['Datum','Autobahn_KFZ_Total']]
# Step 4: Group KFZ_R1 and KFZ_R2 values on date.
df_traffic_data_autobahn_year = df_traffic_data_autobahn_year.groupby('Datum').sum('Autobahn_KFZ_Total')
if df_traffic_data_autobahn_total is None:
df_traffic_data_autobahn_total = df_traffic_data_autobahn_year
else:
df_traffic_data_autobahn_total = pd.concat([df_traffic_data_autobahn_total, df_traffic_data_autobahn_year])
df_traffic_data_autobahn_total['Autobahn_KFZ_Total_Rolling_Average_7_Days'] = df_traffic_data_autobahn_total.rolling(7, min_periods=1).mean()
# Read File
df_traffic_data_autobahn_total
traffic data/autobahn data\2017_A_S.txt traffic data/autobahn data\2018_A_S.txt traffic data/autobahn data\2019_A_S.txt traffic data/autobahn data\2020_A_S.txt traffic data/autobahn data\2021_A_S.txt
| Autobahn_KFZ_Total | Autobahn_KFZ_Total_Rolling_Average_7_Days | |
|---|---|---|
| Datum | ||
| 2017-01-01 | 26975627 | 2.697563e+07 |
| 2017-01-02 | 41560509 | 3.426807e+07 |
| 2017-01-03 | 39117647 | 3.588459e+07 |
| 2017-01-04 | 36828052 | 3.612046e+07 |
| 2017-01-05 | 39337185 | 3.676380e+07 |
| ... | ... | ... |
| 2021-12-27 | 42884833 | 4.293691e+07 |
| 2021-12-28 | 41452599 | 4.184089e+07 |
| 2021-12-29 | 41882438 | 4.037421e+07 |
| 2021-12-30 | 43390035 | 3.904579e+07 |
| 2021-12-31 | 26765605 | 3.820287e+07 |
1826 rows × 2 columns
# Federal roads vehicle detections
df_traffic_data_bundesstrassen_total = None
# assign directory
directory = 'traffic data/bundesstrassen data'
# iterate over files in
# that directory
for filename in os.listdir(directory):
file_path = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(file_path):
print(file_path)
# Open File
df_traffic_data_bundesstrassen_year = pd.read_csv(file_path, delimiter=';', low_memory=False)
df_traffic_data_bundesstrassen_year['Datum'] = '20' + df_traffic_data_bundesstrassen_year['Datum'].apply(str)
df_traffic_data_bundesstrassen_year['Datum'] = pd.to_datetime(df_traffic_data_bundesstrassen_year['Datum'], format='%Y%m%d')
df_traffic_data_bundesstrassen_year['Bundesstrassen_KFZ_Total'] = df_traffic_data_bundesstrassen_year['KFZ_R1'] + df_traffic_data_bundesstrassen_year['KFZ_R2']
df_traffic_data_bundesstrassen_year = df_traffic_data_bundesstrassen_year[['Datum','Bundesstrassen_KFZ_Total']]
# Step 4: Group KFZ_R1 and KFZ_R2 values on date.
df_traffic_data_bundesstrassen_year = df_traffic_data_bundesstrassen_year.groupby('Datum').sum('Bundesstrassen_KFZ_Total')
if df_traffic_data_bundesstrassen_total is None:
df_traffic_data_bundesstrassen_total = df_traffic_data_bundesstrassen_year
else:
df_traffic_data_bundesstrassen_total = pd.concat([df_traffic_data_bundesstrassen_total, df_traffic_data_bundesstrassen_year])
df_traffic_data_bundesstrassen_total['Bundesstrassen_KFZ_Total_Rolling_Average_7_Days'] = df_traffic_data_bundesstrassen_total.rolling(7, min_periods=1).mean()
# Read File
df_traffic_data_bundesstrassen_total
traffic data/bundesstrassen data\2017_B_S.txt traffic data/bundesstrassen data\2018_B_S.txt traffic data/bundesstrassen data\2019_B_S.txt traffic data/bundesstrassen data\2020_B_S.txt traffic data/bundesstrassen data\2021_B_S.txt
| Bundesstrassen_KFZ_Total | Bundesstrassen_KFZ_Total_Rolling_Average_7_Days | |
|---|---|---|
| Datum | ||
| 2017-01-01 | 3886847 | 3.886847e+06 |
| 2017-01-02 | 7362853 | 5.624850e+06 |
| 2017-01-03 | 7740331 | 6.330010e+06 |
| 2017-01-04 | 7537968 | 6.632000e+06 |
| 2017-01-05 | 8039175 | 6.913435e+06 |
| ... | ... | ... |
| 2021-12-27 | 7923371 | 7.957952e+06 |
| 2021-12-28 | 8050999 | 7.596847e+06 |
| 2021-12-29 | 8310961 | 7.272411e+06 |
| 2021-12-30 | 8555059 | 7.103476e+06 |
| 2021-12-31 | 5568421 | 7.044972e+06 |
1826 rows × 2 columns
# Plot and check highway vehicle detections data
fig = px.line(df_traffic_data_autobahn_total, x=df_traffic_data_autobahn_total.index, y='Autobahn_KFZ_Total', markers = True)
fig.show()
# Plot and check highway vehicle detections smoothed moving average 7 days data
fig = px.line(df_traffic_data_autobahn_total, x=df_traffic_data_autobahn_total.index, y='Autobahn_KFZ_Total_Rolling_Average_7_Days', markers = True)
fig.show()
# Plot and check federal roads vehicle detections data
fig = px.line(df_traffic_data_bundesstrassen_total, x=df_traffic_data_bundesstrassen_total.index, y='Bundesstrassen_KFZ_Total', markers = True)
fig.show()
# Plot and check federal roads vehicle detections smoothed moving average 7 days data
fig = px.line(df_traffic_data_bundesstrassen_total, x=df_traffic_data_bundesstrassen_total.index, y='Bundesstrassen_KFZ_Total_Rolling_Average_7_Days', markers = True)
fig.show()
# Combine the two dataframes using a outer join
# This allows for all the information to be available per day on row level
df_traffic_data_total = pd.merge(df_traffic_data_autobahn_total, df_traffic_data_bundesstrassen_total, how='left', on = 'Datum')
# Add the vehicle detections for highways and federal roads to get the total result
df_traffic_data_total['Total'] = df_traffic_data_total['Autobahn_KFZ_Total'] + df_traffic_data_total['Bundesstrassen_KFZ_Total']
df_traffic_data_total['Total_Rolling_Average_7_Days'] = df_traffic_data_total['Autobahn_KFZ_Total_Rolling_Average_7_Days'] + df_traffic_data_total['Bundesstrassen_KFZ_Total_Rolling_Average_7_Days']
# Show total dataframe
df_traffic_data_total
| Autobahn_KFZ_Total | Autobahn_KFZ_Total_Rolling_Average_7_Days | Bundesstrassen_KFZ_Total | Bundesstrassen_KFZ_Total_Rolling_Average_7_Days | Total | Total_Rolling_Average_7_Days | |
|---|---|---|---|---|---|---|
| Datum | ||||||
| 2017-01-01 | 26975627 | 2.697563e+07 | 3886847 | 3.886847e+06 | 30862474 | 3.086247e+07 |
| 2017-01-02 | 41560509 | 3.426807e+07 | 7362853 | 5.624850e+06 | 48923362 | 3.989292e+07 |
| 2017-01-03 | 39117647 | 3.588459e+07 | 7740331 | 6.330010e+06 | 46857978 | 4.221460e+07 |
| 2017-01-04 | 36828052 | 3.612046e+07 | 7537968 | 6.632000e+06 | 44366020 | 4.275246e+07 |
| 2017-01-05 | 39337185 | 3.676380e+07 | 8039175 | 6.913435e+06 | 47376360 | 4.367724e+07 |
| ... | ... | ... | ... | ... | ... | ... |
| 2021-12-27 | 42884833 | 4.293691e+07 | 7923371 | 7.957952e+06 | 50808204 | 5.089486e+07 |
| 2021-12-28 | 41452599 | 4.184089e+07 | 8050999 | 7.596847e+06 | 49503598 | 4.943774e+07 |
| 2021-12-29 | 41882438 | 4.037421e+07 | 8310961 | 7.272411e+06 | 50193399 | 4.764662e+07 |
| 2021-12-30 | 43390035 | 3.904579e+07 | 8555059 | 7.103476e+06 | 51945094 | 4.614927e+07 |
| 2021-12-31 | 26765605 | 3.820287e+07 | 5568421 | 7.044972e+06 | 32334026 | 4.524784e+07 |
1826 rows × 6 columns
# Write dataframe to the output file
df_traffic_data_total.to_csv('traffic data output/traffic data out.csv')